###############################################################################-
# Author: Pietryka
# Contact: matthew.pietryka@gmail.com
# Purpose: create Table 5: Comparing Women and Men in Turnout Similarity
# Notes:
###############################################################################-

# Load Packages  =====================

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
library(flextable)
library(officer)
library(stringr)

# Load Data ==================================

estimates_gender_df <- read_rds("data-files/estimates_gender_df.rds")
gender_diff_df <- read_rds("data-files/gender_diff_df.rds")

source("Plot Preferences.R")


# Function to create tables ======================


get_flextable_defaults()


set_flextable_defaults(
  font.family = the_typeface,
  font.color = "black",
  font.size = 9
)

big_border = fp_border(color="grey50", width = 1)

std_border = fp_border(color="grey75", width = 0.5)

make_table <- function(df){
  df  %>% 
    mutate(across(where(is.double), \(x) round(x, digits = 2))) %>% 
    flextable() 
}


format_table <- function(tab){
  tab %>% 
    border_remove() %>% 
    border_inner_h(border = std_border, part = "body" ) %>% 
    hline_top(part = "all", border = big_border) %>%
    hline_bottom(part = "all", border = big_border)  %>%
    hline(part = "header", i = 2) %>%
    merge_h(part = "header") %>% 
    bold(part = "header")  %>% 
    align_nottext_col(align = "center") %>% 
    align_text_col(align = "right") %>% 
    style(
      i = 1,
      part = "header",
      pr_t = fp_text_default(underlined = TRUE, bold = TRUE)
    )    %>% 
    autofit(part = "all") 
    
}

# convert proportion to percentage if the variable label has the word 'percent'
prop_to_percent <- function(prop, label) {
  ifelse(str_detect(label, "Percent"), 
         prop * 100,
         prop
         )
}


# format percentages w/ 1 digit and counts w/ 2 digits after decimal 
format_estimates <- function(x){
  x <- as.numeric(x)
  ifelse(x > 3, sprintf("%1.1f", x), sprintf("%1.2f", x))
}

# treatment effects by gender    ======================


gender_stats_df <-
  tribble(
    ~order, ~name, ~lab,
    1, "prop_identical_2016", "Percent Identical, 2016",
    2, "prop_identical_2018", "Percent Identical, 2018",
    3, "prop_identical_2020", "Percent Identical, 2020",
    4, "n_identical", "Number of Identical Elections, 2016-2020"
  )

gender_tab <- estimates_gender_df  %>%
  inner_join(gender_stats_df)  %>% 
  arrange(order) %>% 
  mutate(gender = case_when(gender_f == "female" ~ "Women", TRUE ~ "Men"))  %>%
  mutate(across(
    c(
      observed_estimate,
      sim_estimate_unconstrained_mean,
      sim_estimate_mean
    ),
    \(x) prop_to_percent(x, label = lab)
  )) %>%
  select(
    gender,
    lab,
    observed_estimate,
    sim_estimate_unconstrained_mean,
    sim_estimate_mean,
    p_value_unconstrained,
    p_value
  )  %>%
  make_table() %>% 
  set_formatter(
    observed_estimate = function(x) format_estimates(x),
    sim_estimate_unconstrained_mean = function(x) format_estimates(x),
    sim_estimate_mean = function(x) format_estimates(x)
    ) %>% 
  add_header(
    sim_estimate_unconstrained_mean = "Permutation Mean", 
    sim_estimate_mean = "Permutation Mean",
    p_value_unconstrained = "p-value",
    p_value = "p-value"
  )  %>% 
  set_header_labels(
    gender = "Gender",
    lab = "Variable",
    observed_estimate = "Observed Estimate",
    sim_estimate_unconstrained_mean = "Simple",
    sim_estimate_mean = "Constrained",
    p_value_unconstrained = "Simple",
    p_value = "Constrained"
  )   %>% 
  format_table()

gender_tab

# difference in treatment effects between women and men   ======================


gender_diff_tab <- gender_diff_df %>% 
  inner_join(gender_stats_df)  %>% 
  arrange(order) %>% 
  mutate(across(
    c(
      female_minus_male_unconstrained_mean,
      female_minus_male_mean
    ),
    \(x) prop_to_percent(x, label = lab)
  )) %>% 
  select(
    lab,
    female_minus_male_unconstrained_mean,
    female_minus_male_mean,
    female_minus_male_unconstrained_p,
    female_minus_male_p
  )  %>% 
  make_table() %>% 
  set_formatter(
    female_minus_male_unconstrained_mean = function(x) format_estimates(x),
    female_minus_male_mean = function(x) format_estimates(x)
  ) %>% 
  add_header(
    female_minus_male_unconstrained_mean = "Mean Difference (Women - Men)", 
    female_minus_male_mean = "Mean Difference (Women - Men)",
    female_minus_male_unconstrained_p = "p-value",
    female_minus_male_p = "p-value"
  )  %>% 
  set_header_labels(
   # gender = "Gender",
    lab = "Variable",
    female_minus_male_unconstrained_mean = "Simple",
    female_minus_male_mean = "Constrained",
    female_minus_male_unconstrained_p = "Simple",
    female_minus_male_p = "Constrained"
  )  %>% 
  format_table()


gender_diff_tab  





# save tables ======================================================

save_as_docx(
  gender_tab, 
  path = "Results/gender-table.docx"
)


save_as_docx(
  gender_diff_tab, 
  path = "Results/gender-diff-table.docx"
)


